knitr::opts_chunk$set(echo = TRUE)

Data input

rm(list=ls())
#loading packages
library(base)
library(dplyr)
library(tidyr)

# Changeable: site:KH or HC?,dataInput?
site <- "HC" # HC, KH, KH&HC|HC&KH
dataInput <-"C1D1" # 6 cases: C1D1, C2D1, C3D1, C1F1, C2F1, C3F1
#C1D2, C2D2, C3D2, C1D3, C2D3, C3D3, C1F2, C2F2, C3F2,C1F3, C2F3, C3F3

# setwd("~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant")

##Results from different IS and infecting serotype infering models:
if ( dataInput == "C1D1"){
    data <- read.csv("~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_C1D1.csv")  
}

if ( dataInput == "C1D3"){
    data <- read.csv("~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_C1D3.csv")  
}

if ( dataInput == "C1F1"){
    data <- read.csv("~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_HomoAssigned_C1F1.csv")  
}
if ( dataInput == "C1F3"){
    data <- read.csv("~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_HomoAssigned_C1F3.csv")  
}

data$YEAR <- as.factor(data$YEAR)
data$predictedIS<- ordered(data$predictedIS,levels=c("Secondary","Primary","Neg"))
data$pred.serotype<- as.factor(data$pred.serotype)

pop<- data[which(!is.na(data$AGE_MIN)&is.na(data$PanbioUnit)),]# cross out acute and ELISA samples
pop$Site <- substr(pop$sampleID,1,2)

#Agegroup_ 1year
pop$AgeGroup <-factor(as.factor(floor(pop$Age)),labels=c("[1-2)","[2-3)","[3-4)","[4-5)","[5-6)","[6-7)","[7-8)","[8-9)","[9-10)","[10-11)","[11-12)","[12-13)","[13-14)","[14-15)","[15-16)","[16-17)","[17-18)","[18-19)","[19-20)","[20-21)","[21-22)","[22-23)","[23-24)","[24-25)","[25-26)","[26-27)","[27-28)","[28-29)","[29-30)","[30,31)"))# group as  1year 1 group.

#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
## Agegroup_ 2 years : 16groups

# pop$AgeGroup <-factor(as.factor(floor(pop$Age/2)+1),labels=c("[1-2)","[2-4)","[4-6)","[6-8)","[8-10)","[10-12)","[12-14)","[14-16)","[16-18)","[18-20)","[20-22)","[22-24)","[24-26)","[26-28)","[28-30)","[30-31)"))

#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
##Agegroup_3years : 11 groups

# pop$AgeGroup <- floor(pop$Age/3) + 1
# pop$AgeGroup[which(pop$AgeGroup == 11)] <- 10
# pop$AgeGroup <- factor(
#   pop$AgeGroup,
#   labels = c("[1-3)","[3-6)","[6-9)","[9-12)","[12-15)","[15-18)",
#              "[18-21)","[21-24)","[24-27)","[27-31)"))

reformat data

# spread(x,y): spread rows of col x into collums, then value of col y will fit in as rows.

#rename(new_name=old_name).

if (site=="HC"){
   p_year=dplyr::filter(pop,Site=="HC")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)}
if(site == "KH"){
    p_year=dplyr::filter(pop,Site=="KH")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)}
if(site == "KH&HC"|site == "HC&KH"){
    p_year=dplyr::filter(pop)%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)}

# p_year$total <- apply(p_year[,3:8],1,sum,na.rm=T)

p_year$total <- apply(p_year[,4:6],1,sum,na.rm=T)
p_year <- p_year[order(p_year$YEAR),]

#create function for substr age rather than put age in order, this is useful in case of missing data in a specific age: 
my_substr <- function(s){
  if (nchar(s) == 5){
    t <- substr(s, 4, 4)
  }else{
    if (nchar(s) == 6){
      t <- substr(s, 4, 5)
    }else{
      t <- substr(s, 5, 6)
    }
  }
  return(t)
}

#
p_year$age<- apply(p_year[,1],1,my_substr)

# Assign NA as 0 because stan does not support NA value:

for (idx in 4:6){
  idx.na <- which(is.na(p_year[[idx]]))
  p_year[[idx]][idx.na] <- 0
}



# setnames(p_year, old=c("AgeGroup","1","2","3","4","Neg","Secondary","total"),new=c("AgeGroup","DV1.13","DV2.13","DV3.13","DV4.13","Neg.13","Sec.13","total.13"))

Combining all lamda from different model for comparison

FoI constant

rm(list = ls())
library(ggplot2)

site_vec <- c("HC","HC","HC","HC","KH","KH","KH","KH")
data_vec <- c("C1D1", "C1F1", "C1D3","C1F3","C1D1", "C1F1", "C1D3","C1F3")
g <- list()

for(i in (1:8)){
  site <- site_vec[i]
  dataInput <-data_vec[i]
  #Loading stored simulation results
  fit <- readRDS(paste("/home/phuong/pCloudDrive/PMA analysis results/FoI/RDS/FOI constant",site,dataInput,".rds"))# Loading the fit object (no need run over again)
  posterior<-rstan::extract(fit)

  p <- data.frame(lambda = posterior$lambda,
                  year = rep("FOI",length(posterior$lambda))) 

  p1 <- ggplot(p, aes(x=year,y=lambda))+
    geom_violin()+
    ggtitle(paste(site,dataInput))+
    coord_cartesian(ylim=c (0.0,0.03))+
    theme(plot.title = element_text(hjust=0.5,face="bold",size=12,colour = "blue"),
          axis.title.y = element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.text = element_text(face = "bold", angle = 0, hjust = 1))+
    # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
    # geom_boxplot(width=0.1)+# add median and quartile
    stat_summary(fun.data=mean_sdl,
                 geom="pointrange", color="red")#Add mean and standard deviation
  print(i)
  print(p1)
  g[[i]]<- p1#add each plot into plot list
}

library(gridExtra)
library(grid)
lamb <- grid.arrange(g[[1]],g[[2]],g[[3]],g[[4]],g[[5]],g[[6]],g[[7]],g[[8]],nrow=2,ncol=4,bottom=textGrob("Posterior lambda distribution_constant",gp=gpar(fontsize=20,font=8,col="dark blue")))

# ggsave(filename ="Posterior lambda distribution_constant.png",plot=lamb )
# ggsave(filename ="~/Desktop/PMA analysis results/FoI/Constant/lamda/Posterior lambda distribution_constant.png",plot=lamb )

Visualise the model results

Traceplot of estimating parameter: lambda

# # fit<- readRDS("/home/phuonght/pCloudDrive/slides_July2019/models/FOI/Constant/results/Fits/FOI constant HC C1D1 .rds")
# posterior<-rstan::extract(fit) # specify package rstan rather tham tidyr
# # plot to see whether estimated parameter is convergence or not: traceplot
# plottedRows <- 1 : nrow(posterior$lambda)
# plottedRows <- which(plottedRows %% 6 == 0)
# 
# stan_trace(fit, pars="lambda", inc_warmup = F)
# stan_plot(fit,pars = c("pneg","psec"),point_est = "median",show_density = FALSE,ci_level = 0.8, outer_level = 0.95,show_outer_line = TRUE, fill_color = "maroon")
# 
# stan_hist(fit,pars = "lambda")
# 
# stan_dens(fit,pars = "lambda",separate_chains = T)
# 
# stan_ac(fit,pars = "lambda", separate_chains = T)
# 
# stan_trace(fit,pars = "lambda")

Negative

# p_all_year <- data.frame(neg = as.numeric(),
#                          age = as.character(),
#                          year= as.character())
# 
# 
# for(a in 1:length(p_year$age)){
#   p_temp <- data.frame(neg = posterior$pneg[ , a],
#                        age = paste0(p_year$age[a]),
#                        year= paste0(p_year$YEAR[a]))
#   p_all_year <- rbind(p_all_year, p_temp)
# }
# 
# # relable factor level of age in order
# p_all_year$age <- as.numeric(as.character(p_all_year$age))
# p_all_year <- p_all_year[order(p_all_year$age),]
# p_all_year$age <- as.factor(p_all_year$age)
# 
# library(gridExtra)
# library(grid)
# 
# 
# list_year <- unique(p_all_year$year)
# for (idx.year in 1 : length(list_year)){
#   p <- dplyr::filter(p_all_year,year==list_year[idx.year]) # extract data for each year
# 
#   # length of data frame
#   x_max <- length(p$age)
# 
#   # violin plot
#   list.plot <- list()
# 
#   for (i in 1 : 6){# seperate 6 graphs for each year
#     list.plot[[i]] <- ggplot(p[(5*(i-1)*12000 + 1) : min((5*i*12000), x_max),], aes(x=age,y=neg)) +
#       geom_violin()+
#       theme(plot.subtitle = element_text(hjust=0.5),
#             axis.title = element_blank(),
#             axis.text = element_text(face = "bold"))+
#       # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
#       # geom_boxplot(width=0.1)+# add median and quartile
#       stat_summary(fun.data=mean_sdl,
#                    geom="pointrange", color="red")+#Add mean and sta
#       coord_cartesian(ylim=c(0, 1))
#   }
# 
# 
#   grid.arrange(list.plot[[1]],list.plot[[2]],list.plot[[3]],list.plot[[4]],list.plot[[5]], list.plot[[6]], ncol=3,nrow=2, top =textGrob(paste("posterior distribution of neg_ FOI constant",site,p$year[1]),gp=gpar(fontsize=20,col = "blue")))
# 
# }

Primary

# p_all_year <- data.frame(prim = as.numeric(),
#                          age = as.character(),
#                          year= as.character())
# 
# 
# for(a in 1:b){
#   p_temp <- data.frame(prim = posterior$pprimsero[ , a],
#                        age = paste0(p_year$age[a]),
#                        year= paste0(p_year$YEAR[a]))
#   p_all_year <- rbind(p_all_year, p_temp)
# }
# 
# # relable factor level of age in order
# p_all_year$age <- as.numeric(as.character(p_all_year$age))
# p_all_year <- p_all_year[order(p_all_year$age),]
# p_all_year$age <- as.factor(p_all_year$age)
# 
# library(gridExtra)
# library(grid)
# 
# 
# list_year <- unique(p_all_year$year)
# for (idx.year in 1 : length(list_year)){
#   p <- dplyr::filter(p_all_year,year==list_year[idx.year]) # extract data for each year
# 
#   # length of data frame
#   x_max <- length(p$age)
# 
#   # violin plot
#   list.plot <- list()
# 
#   for (i in 1 : 6){# seperate 6 graphs for each year
#     list.plot[[i]] <- ggplot(p[(5*(i-1)*12000 + 1) : min((5*i*12000), x_max),], aes(x=age,y=prim)) +
#       geom_violin()+
#       theme(plot.subtitle = element_text(hjust=0.5),
#             axis.title = element_blank(),
#             axis.text = element_text(face = "bold"))+
#       # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
#       # geom_boxplot(width=0.1)+# add median and quartile
#       stat_summary(fun.data=mean_sdl,
#                    geom="pointrange", color="red")+#Add mean and sta
#       coord_cartesian(ylim=c(0, 1))
#   }
# 
# 
# 
#   grid.arrange(list.plot[[1]],list.plot[[2]],list.plot[[3]],list.plot[[4]],list.plot[[5]], list.plot[[6]], ncol=3,nrow=2, top =textGrob(paste("posterior distribution of primary_ FOI constant",site,p$year[1]),gp=gpar(fontsize=20,col = "blue")))
# 
# }

Secondary

# p_all_year <- data.frame(sec = as.numeric(),
#                          age = as.character(),
#                          year= as.character())
# 
# 
# for(a in 1:b){
#   p_temp <- data.frame(sec = posterior$psec[ , a],
#                        age = paste0(p_year$age[a]),
#                        year= paste0(p_year$YEAR[a]))
#   p_all_year <- rbind(p_all_year, p_temp)
# }
# 
# # relable factor level of age in order
# p_all_year$age <- as.numeric(as.character(p_all_year$age))
# p_all_year <- p_all_year[order(p_all_year$age),]
# p_all_year$age <- as.factor(p_all_year$age)
# 
# library(gridExtra)
# library(grid)
# 
# 
# list_year <- unique(p_all_year$year)
# for (idx.year in 1 : length(list_year)){
#   p <- dplyr::filter(p_all_year,year==list_year[idx.year]) # extract data for each year
# 
#   # length of data frame
#   x_max <- length(p$age)
# 
#   # violin plot
#   list.plot <- list()
# 
#   for (i in 1 : 6){# seperate 6 graphs for each year
#     list.plot[[i]] <- ggplot(p[(5*(i-1)*12000 + 1) : min((5*i*12000), x_max),],   aes(x=age,y=sec)) +
#       geom_violin()+
#       theme(plot.subtitle = element_text(hjust=0.5),
#             axis.title = element_blank(),
#             axis.text = element_text(face = "bold"))+
#       # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
#       # geom_boxplot(width=0.1)+# add median and quartile
#       stat_summary(fun.data=mean_sdl,
#                    geom="pointrange", color="red")+#Add mean and sta
#       coord_cartesian(ylim=c(0, 1))
#   }
# 
# 
# 
#   grid.arrange(list.plot[[1]],list.plot[[2]],list.plot[[3]],list.plot[[4]],list.plot[[5]], list.plot[[6]], ncol=3,nrow=2, top =textGrob(paste("posterior distribution of secondary_ FOI constant",site,p$year[1]),gp=gpar(fontsize=20,col = "blue")))
# 
# }

Estimated proportions within 95% CI

fit <- readRDS(paste("/home/phuong/pCloudDrive/PMA analysis results/FoI/RDS/FOI constant",site,dataInput,".rds"))# Loading the fit object (no need run over again)
posterior<-rstan::extract(fit)
# p <- data.frame(lambda = posterior$lambda,
#                   year = rep("FOI",length(posterior$lambda)))
foisummary <- summary(fit)

pest.all<- data.frame(foisummary$summary)#
l_pest.all = length(pest.all$mean) # last row of pest.all
d = (l_pest.all -1 - 8)/7 # 6 group: neg,prim[1:4],sec,log-like

pest.all<- pest.all[-c(1,l_pest.all-(0:7)),] # get rid of irrelevant rows
pest.all$IS <- rep(c("Neg_est","Prim1_est","Prim2_est","Prim3_est","Prim4_est","Second_est","log-like"),c(d,d,d,d,d,d,d))
pest.all$AgeGroup<- as.integer(rep(p_year$age,7))
pest.all$YEAR<- as.integer(rep(as.character(p_year$YEAR),7))
# pest.all$year<- as.factor(pest.all$year)
# function to get legend from one of the plots => for nicer looking of muliplot
get_legend<-function(myggplot){
    tmp <- ggplot_gtable(ggplot_build(myggplot))
    leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
    legend <- tmp$grobs[[leg]]
    return(legend)
}

#plot
list_year<- unique(pest.all$YEAR)
pp <- list()
for (idx.year in (1 : length(list_year)) ){

    pest <- dplyr::filter(pest.all,YEAR==list_year[idx.year])

    pp[[idx.year]] <- ggplot(pest,aes(x=AgeGroup,y=mean, fill=IS,color=IS))+ geom_line()+
              geom_ribbon(aes(ymin=X2.5., ymax = X97.5.),alpha=0.2)+
              ggtitle(paste(pest$YEAR[1]))+
              xlim(1,31)+
              ylim(0,1)+
              # scale_x_continuous(limits = c(1,31))+
              # scale_y_continuous((limits = c(0,1)))+
              theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
                    axis.text = element_text(face = "bold"),
                    axis.title.y = element_blank())

}

legend <- get_legend(pp[[5]])

library(grid)
library(gridExtra)
grid.arrange(pp[[1]]+theme(legend.position = "none"),
             pp[[2]]+theme(legend.position = "none"),
             pp[[3]]+theme(legend.position = "none"),
             pp[[4]]+theme(legend.position = "none"),
             pp[[5]]+theme(legend.position = "none"),
             legend, nrow=3,ncol=2,
             bottom = textGrob(paste(" Estimated DENV proportion in",site, "_FoI constant "),gp=gpar(fontsize=20,fontface="bold"))) # all plot in one page

Fitting data

Loading real data

Regarding fitting data, the number of samples in each age group is quite small, I gathered samples in groups of 5 years old rather than those of 1. Therefore, there are 6 groups in total: [1-5), [5-10),[10-15), [15-20), [20-25), [25-31)

# CI95%
library(DescTools)
library(data.table)

pop1<- data[which(!is.na(data$AGE_MIN)&is.na(data$PanbioUnit)),]# cross out acute and ELISA samples
pop1$Site <- substr(pop1$sampleID,1,2)

##Agegroup_5years : 6 groups

pop1$AgeGroup <- floor(pop1$Age/5) + 1
pop1$AgeGroup[which(pop1$AgeGroup == 7)]<- 6
pop1$AgeGroup <- factor(
  pop1$AgeGroup,
  labels = c("[1-5)","[5-10)","[10-15)","[15-20)","[20-25)","[25-31)"))

p13CI.all=dplyr::filter(pop1,Site==paste(site))%>%group_by(AgeGroup,YEAR)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)
setnames(p13CI.all, old="Neg", new="Negative")

# Assign NA as 0:

for (idx in 3:5){
  idx.na <- which(is.na(p13CI.all[[idx]]))
  p13CI.all[[idx]][idx.na] <- 0
}

p13CI.all<- tidyr::gather(p13CI.all,"IS","n",3:5)# gather 3 variables (Neg,Prim,Secondary) into 1 variables called "IS"

p13CI.all<- p13CI.all[order(p13CI.all$YEAR,p13CI.all$AgeGroup),]# sort data by year

p13CI.all<- dplyr::mutate(p13CI.all,est=0,lwr.ci=0,upr.ci=0)# create cols for importing result from multinomCI.

for ( i in seq(1,length(p13CI.all$AgeGroup),by=3)){# every 3 rows ,do....
  p13CI.all[i:(i+2),5:7] <- MultinomCI(c(p13CI.all$n[i:(i+2)]))
}

# # plot
# for (idx.year in ( 1 : length(list_year))){
#   p13CI <- dplyr::filter(p13CI.all,YEAR==list_year[idx.year])
#   print(ggplot(p13CI, aes(x=AgeGroup,y=est, fill=IS,color=IS))+ geom_point()+
#           ggtitle(paste("Proportions",site,p13CI$YEAR[1]))+
#           theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5))+
#           geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25, position=position_dodge(0.2)))
# 
# }

Seperated figures

# # actual proportion
# p<- p13CI.all
# p<- p[order(p$IS),]
# 
# # #create function for substr assigning age group as one value, this create the same age structure with p1 data frame below. This function has run in previous chunk => no need re-run in this chunk!
# #
# # my_substr <- function(s){
# #   if (nchar(s) == 5){
# #     t <- substr(s, 4, 4)
# #   }else{
# #     if (nchar(s) == 6){
# #       t <- substr(s, 4, 5)
# #     }else{
# #       t <- substr(s, 5, 6)
# #     }
# #   }
# #   return(t)
# # }
# 
# #
# p$AgeGroup<- apply(p[,1],1,my_substr)
# p <- p[,-4]
# 
# # estimated proportion
# p1 <- pest.all[,c("AgeGroup","YEAR","IS","mean","X2.5.","X97.5.")]
# 
# colnames(p1)<- c("AgeGroup","YEAR","IS","est","lwr.ci","upr.ci")
# 
# pc.all<- rbind(data.frame(p), data.frame(p1))
# 
# 
# pc.all$AgeGroup <- as.numeric(pc.all$AgeGroup)
# 
# #plot negatives
# for ( idx.year in (1:length(list_year))){
# 
#   pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])
# 
#   print(
#     ggplot(pc%>%filter(IS=="Neg_est"|IS=="Negative"),
#            aes(x=AgeGroup,y=est, fill=IS, color=IS))+
#       geom_point()+
#       xlim (1,31)+
#       ylim (0,1)+
#       ggtitle(paste("Neg_FOI_constant",site,pc$YEAR[1]))+
#       theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
#             axis.text = element_text(face = "bold"))+
#       geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position = position_dodge(0.5))# move CI to the left or the right a bit
#   )
# }
# 
# #plot primary
# for ( idx.year in (1:length(list_year))){
# 
#   pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])
# 
#   print(
#     ggplot(pc%>%filter(IS=="Prim_est"|IS=="Primary"),
#            aes(x=AgeGroup,y=est, fill=IS, color=IS))+
#       geom_point()+
#       xlim (1,31)+
#       ylim (0,1)+
#       ggtitle(paste("Prim_FOI_constant",site,pc$YEAR[1]))+
#       theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
#             axis.text = element_text(face = "bold"))+
#       geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position = position_dodge(0.5))# move CI to the left or the right a bit
#   )
# }
# 
# #plot secondary
# for ( idx.year in (1:length(list_year))){
# 
#   pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])
# 
#   print(
#     ggplot(pc%>%filter(IS=="Second_est"|IS=="Secondary"),
#            aes(x=AgeGroup,y=est, fill=IS, color=IS))+
#       geom_point()+
#       xlim (1,31)+
#       ylim (0,1)+
#       ggtitle(paste("Sec_FOI_constant",site,pc$YEAR[1]))+
#       theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
#             axis.text = element_text(face = "bold"))+
#       geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position = position_dodge(0.5))# move CI to the left or the right a bit
#   )
# }
# 

all yrs in one plot

p<- p13CI.all
p<- p[order(p$IS),]

# #create function for substr assigning age group as one value, this create the same age structure with p1 data frame below. This function has run in previous chunk => no need re-run in this chunk!
# 
# my_substr <- function(s){
#   if (nchar(s) == 5){
#     t <- substr(s, 4, 4)
#   }else{
#     if (nchar(s) == 6){
#       t <- substr(s, 4, 5)
#     }else{
#       t <- substr(s, 5, 6)
#     }
#   }
#   return(t)
# }

#
p$AgeGroup<- apply(p[,1],1,my_substr)
p <- p[,-4]

# estimated proportion
p1 <- pest.all[,c("AgeGroup","YEAR","IS","mean","X2.5.","X97.5.")]

colnames(p1)<- c("AgeGroup","YEAR","IS","est","lwr.ci","upr.ci")

pc.all<- rbind(data.frame(p), data.frame(p1))
pc.all$AgeGroup <- as.numeric(pc.all$AgeGroup)

# # function to get legend from one of the plots => for nicer looking of muliplot => runned in chunk 12
# get_legend<-function(myggplot){
#     tmp <- ggplot_gtable(ggplot_build(myggplot))
#     leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
#     legend <- tmp$grobs[[leg]]
#     return(legend)
#     }

#plot negatives

neg <- list() # create empty list for input graphs in for loop
for ( idx.year in (1:length(list_year))){
    pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])

    neg[[idx.year]] <- ggplot(pc%>%filter(IS=="Neg_est"|IS=="Negative"),
                              aes(x=AgeGroup,y=est, fill=IS, color=IS))+
        geom_point()+
        xlim (1,31)+
        ylim (0,1)+
        ggtitle(paste(pc$YEAR[1]))+
        theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
              axis.text = element_text(face = "bold"))+

        geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position =         position_dodge(0.5))# move CI to the left or the right a bit 

}

legend <- get_legend(neg[[5]])

library(grid)
library(gridExtra)
negplot<-grid.arrange(neg[[1]]+theme(legend.position = "none"),
             neg[[2]]+theme(legend.position = "none"),
             neg[[3]]+theme(legend.position = "none"),
             neg[[4]]+theme(legend.position = "none"),
             neg[[5]]+theme(legend.position = "none"),
             legend, nrow=3,ncol=2,
             bottom = textGrob(paste("DENV negatives in",site, "_FoI constant",dataInput),gp=gpar(fontsize=20,fontface="bold"))) # all plot in one page 

# save the figure
ggsave(filename=paste("~/Desktop/PMA analysis results/FoI/Constant/fits/Neg_data fit",site, "_FoI constant",dataInput,".png"),plot=negplot)

#plot primary
prim <- list()
for ( idx.year in (1:length(list_year))){

    pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])

    prim[[idx.year]]<- ggplot(pc%>%filter(IS=="Prim_est"|IS=="Primary"),
                              aes(x=AgeGroup,y=est, fill=IS, color=IS))+
        geom_point()+
        xlim (1,31)+
        ylim (0,1)+
        ggtitle(paste(pc$YEAR[1]))+
        theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
              axis.text = element_text(face = "bold"))+

        geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position =         position_dodge(0.5))# move CI to the left or the right a bit 

}

legend <- get_legend(prim[[5]])
primplot <- grid.arrange(prim[[1]]+theme(legend.position = "none"),
             prim[[2]]+theme(legend.position = "none"),
             prim[[3]]+theme(legend.position = "none"),
             prim[[4]]+theme(legend.position = "none"),
             prim[[5]]+theme(legend.position = "none"),
             legend, nrow=3,ncol=2,
             bottom = textGrob(paste("DENV primary infection in",site, "_FoI constant ",dataInput),gp=gpar(fontsize=20,fontface="bold"))) # all plot in one page 

# save the figure
ggsave(filename=paste("~/Desktop/PMA analysis results/FoI/Constant/fits/Prim_data fit",site, "_FoI constant",dataInput,".png"),plot=primplot)

#plot secondary
sec <- list()
for ( idx.year in (1:length(list_year))){

    pc <- dplyr::filter(pc.all,YEAR==list_year[idx.year])


    sec[[idx.year]] <- ggplot(pc%>%filter(IS=="Second_est"|IS=="Secondary"),
                              aes(x=AgeGroup,y=est, fill=IS, color=IS))+
        geom_point()+
        xlim (1,31)+
        ylim (0,1)+
        ggtitle(paste(pc$YEAR[1]))+
        theme(plot.title = element_text(color="darkblue", size=20, face="bold", hjust = 0.5),
              axis.text = element_text(face = "bold"))+

        geom_errorbar(aes(ymax = upr.ci, ymin = lwr.ci), width = .25,position =         position_dodge(0.5))# move CI to the left or the right a bit 

}

legend <- get_legend(sec[[5]])
secplot <- grid.arrange(sec[[1]]+theme(legend.position = "none"),
             sec[[2]]+theme(legend.position = "none"),
             sec[[3]]+theme(legend.position = "none"),
             sec[[4]]+theme(legend.position = "none"),
             sec[[5]]+theme(legend.position = "none"),
             legend, nrow=3,ncol=2,
             bottom = textGrob(paste("DENV secondary infection in",site, "_FoI constant",dataInput),gp=gpar(fontsize=20,fontface="bold"))) # all plot in one page 

# save the figure
ggsave(filename=paste("~/Desktop/PMA analysis results/FoI/Constant/fits/Sec_data fit",site, "_FoI constant",dataInput,".png"),plot=secplot)

Combining all lamda from different model for comparison

for(i in (1: 6)){
  site_vec <- c("HC","HC","HC","KH","KH","KH")
  data_vec <- c("C1D1", "C2D1", "C3D1","C1D1", "C2D1", "C3D1")

  site <- site_vec[i]
  dataInput <-data_vec[i]
  #Loading stored simulation results
  fit <- readRDS(paste("~/Desktop/PMA analysis results/FoI/Constant/rds/FOI constant",site,dataInput,".rds"))# Loading the fit object (no need run over again)
  posterior<-rstan::extract(fit)

  p <- data.frame(lambda = posterior$lambda)
  p$year <- rep("FOI")

  p[i]<-ggplot(p, aes(x=year,y=lambda)) + geom_violin()+
    ggtitle(paste(site,dataInput))+
    coord_cartesian(ylim=c (0.0,0.03))+
    theme(plot.title = element_text(hjust=0.5,face="bold",size=12,colour = "blue"),
          axis.title.y = element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.text = element_text(face = "bold", angle = 0, hjust = 1))+
    # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
    # geom_boxplot(width=0.1)+# add median and quartile
    stat_summary(fun.data=mean_sdl,
                 geom="pointrange", color="red")#Add mean and standard deviation
}

library(gridExtra)
library(grid)
lamb<- grid.arrange(p1,p2,p3,p4,p5,p6,nrow=2,ncol=3,bottom=textGrob("Posterior lambda distribution_constant",gp=gpar(fontsize=20,font=8,col="dark blue")))

# ggsave(filename ="Posterior lambda distribution_constant.png",plot=lamb )
ggsave(filename ="~/Desktop/PMA analysis results/FoI/Constant/lamda/Posterior lambda distribution_constant.png",plot=lamb )


phuonghtht/PMA_dengue_v2 documentation built on July 4, 2023, 9:57 p.m.